## Estimation of a Multinomial Logit Model by Maximum Likelihood using mnlogit and Fixed point subroutine 
## reshufling people around to investigate how robust our aggregation is. indexexer <- c("social","civil")
## when indexexer is social, households arereshuffled within their same social
## by Jose-Alberto Guerra
##    Created   04/2014
##    Modified  24/06/2019 
#ProofRnew.dta comes from border_reg_weighted4v1.do, notice we have eliminated agricultural
#ProofRPlacebonew.dta comes from border_reg_weighted4Placebo.do (IDSocial==207 is river thames)
#Proof2RCoordnew.dta is Proof2Rnew but adding the coordinates for objectid_1 points. It comes from TemporyPointsCoordinatesv1.do
#Proof2RPlaceboCoordnew.dta is Proof2RPlacebonew but adding the coordinates for objectid_1 points. It comes from TemporyPointsCoordinatesv1.do
#to deal with spatial data visualitaion in R see http://spatial.ly/r/
##Check lines below Robustness, they should be commented out for our basic specification
## Where we guarantee that KS probabilities are within [0,1]
## We guarantee inner loop (fixed point expectations) convergence using tol 10e-10 for the sup-norm stopping criterion
## We follow KS 2012 and KS 2018 using outer loope iterations k=50 and guaranteeing inner loop convergence (see above)
###########################################################################
rm(list=ls(all=TRUE))
#########################
####    Preamble     ####
#########################
##Functions
codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/C_ANALYSIS/" #codes <- "C:/HPC/CODE/"
#codes <- "/Users/josealbertoguerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/ReStat_R&R/Codes/C_ANALYSIS/"
source(paste(codes,"Mlogit_NAPPHeteroFunctionsv2.R",sep=""))  
source(paste(codes,"Mlogit_PlaceboCoordFunctions.R",sep=""))
###########################################################################
#1. DATA
###########################################################################
###########################################################################

set.seed(12345)
#set.seed(98765)
#define WD and DATASET
##If normal data set
root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/" #root <- "C:/HPC/"
#root <- "/Users/josealbertoguerra/Dropbox/NetworkParish2016/Occupational_Choice/"
mapdata <- paste(root,"MOLA/",sep="")
dataOGR <- paste(root,"MOLA/",sep="")
wddata<-c(paste(root,"Results/ReStat/Placebo/Coords/",sep=""),paste(root,"NAPP/Proof2RCoordnew.dta",sep="")) #If Placebo dataset #wddata<-c("C:/HPC/Results/Placebo","C:/HPC/NAPP_address/Proof2RPlacebo.dta")
setwd(wddata[1])
# set the Robust exercise, social: hh are randomly allocated to a new street within their same social
# set the Robust exercise, civil: hh are randomly allocated to a new street within their same civil
# set the Robust exercise, all: hh are randomly allocated to a new street across all London
allindexexer <- c("social") #c("all","civil","social")
for(indexexer in allindexexer) {
  indexexer <- "social" #c("social","civil","all")
  sink(paste("Explorenew",indexexer,".txt",sep=""))
  options(max.print=1000000)
  gc()
  
  #Calling maps
  map <- readShapeSpatial(paste(mapdata,"socialparishpolygonV1_LLPA.shp",sep=""))# however no spatial info
  
  line <- readShapeSpatial(paste(mapdata,"socialparishlineV1_LLPA.shp",sep=""))
  linec <- readShapeSpatial(paste(mapdata,"civilparishMOLAline.shp",sep=""))
  
  #Robust calling NAPP household head data
  NAPP<-read.dta(wddata[2],convert.factors=FALSE)
  #load pooleddatato keep exaclty the same sample as in our original study
  load(file=paste(root,"NAPP/ReStat/pooleddataNAPPneww50list.RData",sep="" ))
  HHstudied <- unique(pooleddata$ID)
  NAPP <- subset(NAPP,(HHNUMhead %in% HHstudied))
  colnames(NAPP)[colnames(NAPP)=="mergecoord"] <- "all"
  rm(pooleddata)
  
  #To sort data bys social and HHNUMhead
  index <- with(NAPP, order(social,HHNUMhead))  
  NAPP <- NAPP[index,]
  NAPP0 <- NAPP
  geovars <- c("longitude","latitude","objectid_1","social","civil","Buffer100","Buffer80","Buffer60","Buffer50","Buffer40","ID_socialborder")
  
  ii <- 2
  while(ii<101){
    print(ii)
    NAPP <- NAPP0
    ############
    # Placebo coords to randomly reshuffle households within all, civil or social parish
    NAPPindexrand <- ddply(NAPP, indexexer, function(df) { df$HHNUMhead <- df$HHNUMhead[sample(nrow(df))]; df})[,c("HHNUMhead",geovars)]
  
    indexa <- with(NAPPindexrand, order(HHNUMhead)) #to sort the geo data based on HHNUMhead
    indexalt <- with(NAPP, order(HHNUMhead)) #to sort the original data based on HHNUMhead
    NAPP[indexalt,geovars] <- NAPPindexrand[indexa,geovars] #To replace original xvars based on HHNUMhead but geovars based on random reshuffling
    index <- with(NAPP, order(social,HHNUMhead))  
    NAPP <- NAPP[index,]
    ############
    
    #Modifying NAPP
    NAPP$Buffer400<-1 #including column with buffer400
    
    ####################
    ##Data Definition##
    ####################
    
    ##Individual Variables
    
    #subset of pointsobs such that: civil parish != Noscials<2 and Social parishes such that n_p>=30
    NAPPest<-subset(NAPP,select=c(civil, social, HHNUMhead, SEX, AGE, married, nchild, nservant, stayerp, stayerc, classnew))
    #write.dta(NAPPest,paste(root,"NAPP/ReStat/Proof2RCoordPlaceboSubsamplenew",indexexer,".dta",sep=""))
    
    ###########################################################
    #00.a To keep only those with occupations and employed (see 00.b) #########
    #NAPPest<-read.dta(paste(root,"NAPP/ReStat/Proof2RCoordPlaceboSubsamplenew",indexexer,".dta",sep=""),convert.factors=FALSE)
    #NAPPest <- NAPPest[(ddply(NAPPest,.(social),transform, Nn =length(social))$Nn>=30),]
    NAPPest$classnew[NAPPest$classnew==1] <- 2 ##professional is now class 2
    NAPPest$classnew <- NAPPest$classnew-2
    #write.dta(NAPPest,paste(root,"NAPP/ReStat/Proof2RCoordPlaceboSubsamplelast",indexexer,".dta",sep=""))
    #############
    
    ##############################################
    #0.a Robustness: to keep only non movers #########
    #NAPPest <- subset(NAPPest,stayerc==1) ########
    #NAPPest <- NAPPest[(ddply(NAPPest,.(social),transform, Nn =length(social))$Nn>=30),]
    ##############################################
    
    class <- factor(NAPPest$classnew) #defining factors based on class 
    class.m <- model.matrix(~ -1 + class) #creating columnsdummy vars for each class category
    NAPPest <- cbind(NAPPest,class.m)
    colnames(NAPPest)[colnames(NAPPest) %in% c("civil","social","HHNUMhead","classnew")] <- c("IDCivil","IDSocial","ID","choice")
    
    
    PooledNAPPID <- NAPPest[(colnames(NAPPest) %in% c("IDCivil","IDSocial","ID","choice"))]
    #Subset of individual characteristics
    NAPPX <- NAPPest[!(colnames(NAPPest) %in% c("IDCivil","ID","choice"))]
    NAPPXlist <- split(NAPPX, list(NAPPX$IDSocial))
    NAPPXlist <- lapply(NAPPXlist,function(x) x <- as.matrix(x[!(colnames(x) %in% c("IDSocial"))])) #to convert data.frame to matrix
    
    #Geographic variables
    ###########################################################
    #00.a To keep only those with occupations and employed #########
    NAPPgeo <- subset(NAPP,select=c(longitude , latitude, social))
    #NAPPgeo <- NAPPgeo[(ddply(NAPPgeo,.(social),transform, Nn =length(social))$Nn>=30),]
    ##############################################
    #0.b Robustness: to keep only non movers #########
    #NAPPgeo <- subset(NAPP,!(civil %in% c.n.2s) & !(social %in% p.n.N) & stayerc==1,select=c(longitude , latitude, social)) ########
    #NAPPgeo <- NAPPgeo[(ddply(NAPPgeo,.(social),transform, Nn =length(social))$Nn>=30),]
    ##############################################
    
    colnames(NAPPgeo)[colnames(NAPPgeo) %in% c("longitude","latitude","social")] <- c("long","lati","IDSocial")
    coordslistNAPP  <- split(NAPPgeo, list(NAPPgeo$IDSocial))
    coordslistNAPP <- lapply(coordslistNAPP, function(x) x <- cbind(x[["long"]]+rnorm(nrow(x),0,0.1),x[["lati"]]+rnorm(nrow(x),0,0.1)) ) #adding a minor disturbance to original coordinates so sdep functions work fine similar to #jitterDupCoords(, max=0.01)
    coordslist <- coordslistNAPP
    
    
    ##############################################
    #1. Robustness weighting matrices 
    # Lists of neighbours per parish (Original weights)
    #dneighbours <- lapply(coordslistNAPP, atleast1nei ) 
    #Adjancecy matrix row normalised (style="W")
    #wlist<-lapply(dneighbours, function(x) nb2listw(x, glist=NULL, style="W", zero.policy=TRUE)) # zero.policy=NULL if TRUE permit the weights list to be formed with zero-length weights vectors. Style  can take values "W", "B", "C", "U", "minmax" and "S"
    ##get at most 10 neighbours
    #kneighbours <- lapply(coordslistNAPP, atmost10nei ) 
    #wklist<-lapply(kneighbours, function(x) nb2listw(x, glist=NULL, style="W", zero.policy=TRUE)) # zero.policy=NULL if TRUE permit the weights list to be formed with zero-length weights vectors. Style  can take values "W", "B", "C", "U", "minmax" and "S"
    ##within 50 mts
    d50neighbours <- lapply(coordslistNAPP, within50nei ) 
    w50list<-lapply(d50neighbours, function(x) nb2listw(x, glist=NULL, style="W", zero.policy=TRUE)) # zero.policy=NULL if TRUE permit the weights list to be formed with zero-length weights vectors. Style  can take values "W", "B", "C", "U", "minmax" and "S"
    ##within 100 mts
    #d100neighbours <- lapply(coordslistNAPP, within100nei ) 
    #w100list<-lapply(d100neighbours, function(x) nb2listw(x, glist=NULL, style="W", zero.policy=TRUE)) # zero.policy=NULL if TRUE permit the weights list to be formed with zero-length weights vectors. Style  can take values "W", "B", "C", "U", "minmax" and "S"
    
    #2. for the robustness checks
    ##if we want at most 10 neighbours
    #dneighbours <- kneighbours
    #wlist <- wklist 
    ##if we want within 50 mts
    dneighbours <- d50neighbours
    wlist <- w50list #if we want neighbours within 50mts
    ##if we want within 100 mts
    #dneighbours <- d100neighbours
    #wlist <- w100list #if we want neighbours within 100mts
    ##############################################
    
    #Combining lists
    NAPPXwlist <- mapply(weighting1,NAPPXlist,wlist) 
    NAPPXlist <- lapply(NAPPXlist,function(x) x <- as.data.frame(x)) #to convert  matrix to data.frame
    pooledNAPPXlist <- rbindlist(NAPPXlist)
    pooledNAPPXlist <- cbind(PooledNAPPID, pooledNAPPXlist)
    
    NAPPXwlist <- lapply(NAPPXwlist,function(x) x <- as.data.frame(x)) #to convert  matrix to data.frame
    pooledNAPPXwlist <- rbindlist(NAPPXwlist)
    attr(pooledNAPPXwlist,"class") <- "data.frame"
    
    
    pooleddatalist <- cbind(pooledNAPPXlist[ !(colnames(pooledNAPPXlist) %in% paste("class",c(min(pooledNAPPXlist$choice):max(pooledNAPPXlist$choice)),sep="")) ],pooledNAPPXwlist)
    colnames(pooleddatalist)[colnames(pooleddatalist) %in% paste("class",c(min(pooledNAPPXlist$choice):max(pooledNAPPXlist$choice)),"w",sep="") ] <- paste("sw.",c(min(pooledNAPPXlist$choice):max(pooledNAPPXlist$choice)),sep="")
    datalist <- split(pooleddatalist, list(pooleddatalist$IDSocial))
    
    #Poduce the long format and pooled data
    datalistlong <- lapply(datalist,function(x) tolong(x,"choice",c("sw.")))
    pooleddata <- rbindlist(datalistlong)
    
    #Produce long data for homogenoeus beliefs
    pooleddatalisthomo <- pooleddatalist
    pooleddatalisthomo[grep("w", names(pooleddatalisthomo),value=TRUE)] <-  ddply(pooledNAPPXlist, .(IDSocial), sapply, FUN = 'witaver')[c("SEX","AGE","married",  "nchild",   "nservant", "stayerp",  "stayerc",  "class0",   "class1",   "class2",   "class3",   "class4",   "class5")]
    
    pooleddatahomo <-  tolong(pooleddatalisthomo,"choice",c("sw."))
    #To save data paste(root,"Mola Data/",sep="")
    save(pooleddata, file=paste(root,"NAPP/ReStat/Placebo/Coords/pooleddataNAPPnew",indexexer,ii,".RData",sep="" )) #Data per parish in long format (sw) pooled together
    save(pooleddatahomo, file=paste(root,"NAPP/ReStat/Placebo/Coords/pooleddataNAPPhomonew",indexexer,ii,".RData",sep="" )) #Data per parish in long format (sw) pooled together for homogeneous beliefs
    save(pooleddatalist, file=paste(root,"NAPP/ReStat/Placebo/Coords/pooleddatalistNAPPnew",indexexer,ii,".RData",sep="" )) #Data per parish in wide format (sw.1,sw.2,...) pooled together
    save(pooleddatalisthomo, file=paste(root,"NAPP/ReStat/Placebo/Coords/pooleddatalistNAPPhomonew",indexexer,ii,".RData",sep="" )) #Data per parish in wide format (sw.1,sw.2,...) pooled together for the homogeneous case
    save(datalist, file=paste(root,"NAPP/ReStat/Placebo/Coords/datalistNAPPnew",indexexer,ii,".RData",sep="" )) #Lists of data per parish in wide format (sw.1,sw.2,...)
    save(coordslist, file=paste(root,"NAPP/ReStat/Placebo/Coords/coordslistNAPPnew",indexexer,ii,".RData",sep="" )) #Lists of coordinates per parish
    save(dneighbours, file=paste(root,"NAPP/ReStat/Placebo/Coords/dneighboursNAPPnew",indexexer,ii,".RData",sep="")) #Lists of neigbours identities per parish
    save(wlist, file=paste(root,"NAPP/ReStat/Placebo/Coords/wlistNAPPnew",indexexer,ii,".RData",sep="")) #Sparse Lists of neighbours per parish
  ii <- ii+1
  }
  sink()
}

###########################
###Heterogenous Beliefs (i.e. ``network'' interactions, correlated effects and fixed point beliefs subroutine)
###########################
rm(list=ls(all=TRUE))
set.seed(12345)
root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/" #root <- "C:/HPC/"
#root <- "/Users/josealbertoguerra/Dropbox/NetworkParish2016/Occupational_Choice/"
setwd(paste(root,"Results/ReStat/Placebo/Coords/",sep=""))
allindexexer <- c("social") #c("all","civil","social")
for(indexexer in allindexexer) {
  indexexer <- "social" #c("social","civil","all")
  #codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/ReStat_R&R/Codes/C_ANALYSIS/" #codes <- "C:/HPC/CODE/"
  codes <- paste(root,"C_ANALYSIS/",sep="")
  source(paste(codes,"Mlogit_NAPPHeteroFunctionsv2.R",sep=""))
  sink(paste("NAPPEstimationHete",indexexer,".txt",sep = ""))
  options(max.print=1000000)
  gc()
  ii <- 3 
  while(ii<101) {
    print(ii)
    #Load Data
    
    load(file=paste(root,"NAPP/ReStat/Placebo/Coords/pooleddataNAPPnew",indexexer,ii,".RData",sep="" )) # pooleddata, Data per parish in long format (sw) pooled together
    load(file=paste(root,"NAPP/ReStat/Placebo/Coords/pooleddataNAPPhomonew",indexexer,ii,".RData",sep="" )) # pooleddatahomo, Data per parish in long format (sw) pooled together for hmogenous
    load(file=paste(root,"NAPP/ReStat/Placebo/Coords/pooleddatalistNAPPnew",indexexer,ii,".RData",sep="" )) #pooleddatalist, Data per parish in wide format (sw.1,sw.2,...) pooled together
    load(file=paste(root,"NAPP/ReStat/Placebo/Coords/pooleddatalistNAPPhomonew",indexexer,ii,".RData",sep="" )) #pooleddatalisthomo, Data per parish in wide format (sw.1,sw.2,...) pooled together
    load(file=paste(root,"NAPP/ReStat/Placebo/Coords/datalistNAPPnew",indexexer,ii,".RData",sep="" )) #datalist, Lists of data per parish in wide format (sw.1,sw.2,...)
    load(file=paste(root,"NAPP/ReStat/Placebo/Coords/coordslistNAPPnew",indexexer,ii,".RData",sep="" )) #coordslist, Lists of coordinates per parish
    load(file=paste(root,"NAPP/ReStat/Placebo/Coords/dneighboursNAPPnew",indexexer,ii,".RData",sep="")) #dneighbours, Lists of neigbours identities per parish
    load(file=paste(root,"NAPP/ReStat/Placebo/Coords/wlistNAPPnew",indexexer,ii,".RData",sep="")) #wlist, Sparse Lists of neighbours per parish
    
    print("Heterogeneous  beliefs: ``network'' interactions, correlated effects and fixed point beliefs subroutine")
    
    #define which variables we include in the estimation
    xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "stayerc"
    wvar <- "sw"
    
    ## Naive Estimation Heterogeneous
    ###################
  #  print("Naive Estimation")
  #  gc()
    #1. x_i  
  #  fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(xvars, collapse= "+"),"|", wvar ,sep="")))          
  #  ml.w <- mnlogit(fformula, pooleddata,"alt")
  #  save(ml.w,file=paste("Mlwnew",indexexer,".RData",sep=""))
  #  print("w, x_i")
  #  print(summary(ml.w))
    #2. x_i, x_p
    ##w
  #  fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep="")),"|", wvar ,sep="")))          
  #  ml.om.w <- mnlogit(fformula, pooleddata,"alt")
  #  save(ml.om.w,file=paste("Mlomwnew",indexexer,".RData",sep=""))
  #  print("w, x_i x_p")
  #  print(summary(ml.om.w))
    #3. x_i, x_p, t_b
    ##w
  #  fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"|", wvar ,sep="")))          
  #  ml.m.w <- mnlogit(fformula, pooleddata,"alt")
  #  save(ml.m.w,file=paste("Mlmwnew",indexexer,".RData",sep=""))
  #  print("w, x_i x_p t_b")
  #  print(summary(ml.m.w))
    
    #4. x_i, x_p, t_s
    ##w
    fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"|", wvar ,sep="")))          
    ml.mt.w <- mnlogit(fformula, pooleddata,"alt")
    save(ml.mt.w,file=paste("Mlmtwnew",indexexer,ii,".RData",sep=""))
    print("w, x_i x_p t_p")
    print(summary(ml.mt.w))
    ii <- ii+1
  }
  sink()
}

## Nested Pseudo likelihood Estimation Heterogeneous
###################

## Nested Pseudo likelihood Estimation Heterogeneous ***WITH*** fe at parish level
source(paste(codes,"Mlogit_NAPPHeterofformula1PlaceboCoordinatesv3.R",sep=""))


